from pyspark.sql import SparkSession
from pyspark.sql.functions import lit

if __name__ == '__main__':
    spark = SparkSession.builder.appName("cube_learn").master("local[*]").getOrCreate()
    df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])

    # select name, age, count(1) count from A group by name, age
    # union all select name, null as age, count(1) count from A group by name
    # union all select null as name, age, count(1) count from A group by age
    # union all select null as name, null as age, count(1) count from A
    df.cube("name", df.age).count().orderBy("name", "age").show()

    df.groupby(['name', 'age']).count().show()
    df.groupby(['name']).count().withColumn("age", lit("null")).show()
    df.groupby(['age']).count().withColumn("name", lit("null")).select("name", "age", "count").show()
    df.groupby().count().withColumn("name", lit("null")).withColumn("age", lit("null"))\
        .select("name", "age", "count").show()
    spark.stop()
